In [2]:
## Question 1
In [9]:
fig.show()
In [3]:
import pandas as pd
import plotly.express as px
import numpy as np
In [4]:
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
    "modify"   : "roi", 
    "modify.1" : "level4",
    "modify.2" : "level3", 
    "modify.3" : "level2",
    "modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
multilevel_lookup.head()
Out[4]:
roi level4 level3 level2 level1
0 SFG_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
1 SFG_R SFG_R Frontal_R CerebralCortex_R Telencephalon_R
2 SFG_PFC_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
3 SFG_PFC_R SFG_R Frontal_R CerebralCortex_R Telencephalon_R
4 SFG_pole_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
In [5]:
## Now load in the subject data
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
subjectData.head()

subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()
Out[5]:
roi volume level4 level3 level2 level1 icv comp
0 SFG_L 12926 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.009350
1 SFG_R 10050 SFG_R Frontal_R CerebralCortex_R Telencephalon_R ICV 0.007270
2 SFG_PFC_L 12783 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.009247
3 SFG_PFC_R 11507 SFG_R Frontal_R CerebralCortex_R Telencephalon_R ICV 0.008324
4 SFG_pole_L 3078 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.002227
In [6]:
import pandas as pd
import plotly.graph_objects as go
In [7]:
def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'):
  
    colorPalette = ['#46039f', '#9c179e','#e377c2','##17becf','#d62728']
    labelList = []
    colorNumList = []
    for catCol in cat_cols:
        labelListTemp =  list(set(df[catCol].values))
        colorNumList.append(len(labelListTemp))
        labelList = labelList + labelListTemp
        
    labelList = list(dict.fromkeys(labelList))
   
    colorList = []
    for idx, colorNum in enumerate(colorNumList):
        colorList = colorList + [colorPalette[idx]]*colorNum
        
    for i in range(len(cat_cols)-1):
        if i==0:
            sourceTargetDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
            sourceTargetDf.columns = ['source','target','count']
        else:
            tempDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
            tempDf.columns = ['source','target','count']
            sourceTargetDf = pd.concat([sourceTargetDf,tempDf])
        sourceTargetDf = sourceTargetDf.groupby(['source','target']).agg({'count':'sum'}).reset_index()
        
    sourceTargetDf['sourceID'] = sourceTargetDf['source'].apply(lambda x: labelList.index(x))
    sourceTargetDf['targetID'] = sourceTargetDf['target'].apply(lambda x: labelList.index(x))
    
    data = dict(
        type='sankey',
        node = dict(
          pad = 16,
          thickness = 15,
          line = dict(
            color = "black",
            width = 0.5
          ),
          label = labelList,
          color = colorList
        ),
        link = dict(
          source = sourceTargetDf['sourceID'],
          target = sourceTargetDf['targetID'],
          value = sourceTargetDf['count']
        )
      )
    
    layout =  dict(
        title = title,
        font = dict(
          size = 7
        )
    )
       
    fig = dict(data=[data], layout=layout)
    return fig
In [8]:
sank = genSankey(subjectData,cat_cols=['icv','level1','level2'],value_cols='comp',title='Type 1 for Levels 1 and 2')
fig = go.Figure(sank)